/*
 * Copyright (c) 2012-2013 Etnaviv Project
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sub license,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial portions
 * of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */
/* Gallium pipe driver
 */
#include "etna_pipe.h"
#include "etna_translate.h"

#include <etnaviv/common.xml.h>
#include <etnaviv/state.xml.h>
#include <etnaviv/state_3d.xml.h>
#include <etnaviv/cmdstream.xml.h>
#include <etnaviv/viv.h>
#include <etnaviv/etna.h>

#include "etna_blend.h"
#include "etna_clear_blit.h"
#include "etna_compiler.h"
#include "etna_debug.h"
#include "etna_fence.h"
#include "etna_rasterizer.h"
#include "etna_resource.h"
#include "etna_shader.h"
#include "etna_surface.h"
#include "etna_texture.h"
#include "etna_transfer.h"
#include "etna_zsa.h"

#include "pipe/p_context.h"
#include "pipe/p_defines.h"
#include "pipe/p_state.h"
#include "util/u_math.h"
#include "util/u_inlines.h"
#include "util/u_memory.h"
#include "util/u_prim.h"

#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdbool.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <stdarg.h>
#include <assert.h>
#include <math.h>
#include <errno.h>

/*********************************************************************/
/* Context handling */

#define ETNA_3D_CONTEXT_SIZE (400) /* keep this number above "Total state updates (fixed)" from gen_weave_state tool */

/* Create bit field that specifies which samplers are active and thus need to be programmed
 * 32 bits is enough for 32 samplers. As far as I know this is the upper bound supported on any Vivante hw
 * up to GC4000.
 */
static uint32_t active_samplers_bits(struct pipe_context *pipe)
{
    struct etna_pipe_context *restrict e = etna_pipe_context(pipe);
    unsigned num_fragment_samplers = MIN2(e->num_fragment_samplers, e->num_fragment_sampler_views);
    unsigned num_vertex_samplers = MIN2(e->num_vertex_samplers, e->num_vertex_sampler_views);
    uint32_t active_samplers = etna_bits_ones(num_fragment_samplers) |
                               etna_bits_ones(num_vertex_samplers) << e->specs.vertex_sampler_offset;
    return active_samplers;
}

/* Reset / re-upload context.
 *
 * This pushes the current register state in pipe->gpu3d to the GPU.
 * The function is used to initialize the GPU in a predictable state
 * at the beginning of rendering, as well as to create a context
 * buffer for the kernel driver.
 */
static void reset_context(struct pipe_context *restrict pipe)
{
    struct etna_pipe_context *restrict e = etna_pipe_context(pipe);
    struct etna_ctx *restrict ctx = e->ctx;

#define EMIT_STATE(state_name, dest_field) \
    ETNA_COALESCE_STATE_UPDATE(state_name, e->gpu3d.dest_field, 0)

#define EMIT_STATE_FIXP(state_name, dest_field) \
    ETNA_COALESCE_STATE_UPDATE(state_name, e->gpu3d.dest_field, 1)

    uint32_t last_reg, last_fixp, span_start;
    ETNA_COALESCE_STATE_OPEN(ETNA_3D_CONTEXT_SIZE);
    /* multi sample config is set first, and outside of the normal sorting
     * order, as changing the multisample state clobbers PS.INPUT_COUNT (and
     * possibly PS.TEMP_REGISTER_CONTROL).
     */
    /*03818*/ EMIT_STATE(GL_MULTI_SAMPLE_CONFIG, GL_MULTI_SAMPLE_CONFIG);
    /* below code generated by gen_weave_state.py, keep this in sync with sync_context! */
    /* begin only EMIT_STATE -- make sure no new etna_reserve calls are done here directly
     *    or indirectly */
    for(int x=0; x<e->gpu3d.num_vertex_elements; ++x)
    {
        /*00600*/ EMIT_STATE(FE_VERTEX_ELEMENT_CONFIG(x), FE_VERTEX_ELEMENT_CONFIG[x]);
    }
    /*00644*/ EMIT_STATE(FE_INDEX_STREAM_BASE_ADDR, FE_INDEX_STREAM_BASE_ADDR);
    /*00648*/ EMIT_STATE(FE_INDEX_STREAM_CONTROL, FE_INDEX_STREAM_CONTROL);
    /*0064C*/ EMIT_STATE(FE_VERTEX_STREAM_BASE_ADDR, FE_VERTEX_STREAM_BASE_ADDR);
    /*00650*/ EMIT_STATE(FE_VERTEX_STREAM_CONTROL, FE_VERTEX_STREAM_CONTROL);
    for(int x=0; x<8; ++x)
    {
        /*00680*/ EMIT_STATE(FE_VERTEX_STREAMS_BASE_ADDR(x), FE_VERTEX_STREAMS_BASE_ADDR[x]);
    }
    for(int x=0; x<8; ++x)
    {
        /*006A0*/ EMIT_STATE(FE_VERTEX_STREAMS_CONTROL(x), FE_VERTEX_STREAMS_CONTROL[x]);
    }
    /*00800*/ EMIT_STATE(VS_END_PC, VS_END_PC);
    /*00804*/ EMIT_STATE(VS_OUTPUT_COUNT, VS_OUTPUT_COUNT);
    /*00808*/ EMIT_STATE(VS_INPUT_COUNT, VS_INPUT_COUNT);
    /*0080C*/ EMIT_STATE(VS_TEMP_REGISTER_CONTROL, VS_TEMP_REGISTER_CONTROL);
    for(int x=0; x<4; ++x)
    {
        /*00810*/ EMIT_STATE(VS_OUTPUT(x), VS_OUTPUT[x]);
    }
    for(int x=0; x<4; ++x)
    {
        /*00820*/ EMIT_STATE(VS_INPUT(x), VS_INPUT[x]);
    }
    /*00830*/ EMIT_STATE(VS_LOAD_BALANCING, VS_LOAD_BALANCING);
    /*00838*/ EMIT_STATE(VS_START_PC, VS_START_PC);
    if (e->specs.has_shader_range_registers)
    {
        /*0085C*/ EMIT_STATE(VS_RANGE, VS_RANGE);
    }
    /*00A00*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_X, PA_VIEWPORT_SCALE_X);
    /*00A04*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_Y, PA_VIEWPORT_SCALE_Y);
    /*00A08*/ EMIT_STATE(PA_VIEWPORT_SCALE_Z, PA_VIEWPORT_SCALE_Z);
    /*00A0C*/ EMIT_STATE_FIXP(PA_VIEWPORT_OFFSET_X, PA_VIEWPORT_OFFSET_X);
    /*00A10*/ EMIT_STATE_FIXP(PA_VIEWPORT_OFFSET_Y, PA_VIEWPORT_OFFSET_Y);
    /*00A14*/ EMIT_STATE(PA_VIEWPORT_OFFSET_Z, PA_VIEWPORT_OFFSET_Z);
    /*00A18*/ EMIT_STATE(PA_LINE_WIDTH, PA_LINE_WIDTH);
    /*00A1C*/ EMIT_STATE(PA_POINT_SIZE, PA_POINT_SIZE);
    /*00A28*/ EMIT_STATE(PA_SYSTEM_MODE, PA_SYSTEM_MODE);
    /*00A2C*/ EMIT_STATE(PA_W_CLIP_LIMIT, PA_W_CLIP_LIMIT);
    /*00A30*/ EMIT_STATE(PA_ATTRIBUTE_ELEMENT_COUNT, PA_ATTRIBUTE_ELEMENT_COUNT);
    /*00A34*/ EMIT_STATE(PA_CONFIG, PA_CONFIG);
    for(int x=0; x<10; ++x)
    {
        /*00A40*/ EMIT_STATE(PA_SHADER_ATTRIBUTES(x), PA_SHADER_ATTRIBUTES[x]);
    }
    /*00C00*/ EMIT_STATE_FIXP(SE_SCISSOR_LEFT, SE_SCISSOR_LEFT);
    /*00C04*/ EMIT_STATE_FIXP(SE_SCISSOR_TOP, SE_SCISSOR_TOP);
    /*00C08*/ EMIT_STATE_FIXP(SE_SCISSOR_RIGHT, SE_SCISSOR_RIGHT);
    /*00C0C*/ EMIT_STATE_FIXP(SE_SCISSOR_BOTTOM, SE_SCISSOR_BOTTOM);
    /*00C10*/ EMIT_STATE(SE_DEPTH_SCALE, SE_DEPTH_SCALE);
    /*00C14*/ EMIT_STATE(SE_DEPTH_BIAS, SE_DEPTH_BIAS);
    /*00C18*/ EMIT_STATE(SE_CONFIG, SE_CONFIG);
    /*00E00*/ EMIT_STATE(RA_CONTROL, RA_CONTROL);
    /*00E04*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E04, RA_MULTISAMPLE_UNK00E04);
    /*00E08*/ EMIT_STATE(RA_EARLY_DEPTH, RA_EARLY_DEPTH);
    for(int x=0; x<4; ++x)
    {
        /*00E10*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E10(x), RA_MULTISAMPLE_UNK00E10[x]);
    }
    for(int x=0; x<16; ++x)
    {
        /*00E40*/ EMIT_STATE(RA_CENTROID_TABLE(x), RA_CENTROID_TABLE[x]);
    }
    /*01000*/ EMIT_STATE(PS_END_PC, PS_END_PC);
    /*01004*/ EMIT_STATE(PS_OUTPUT_REG, PS_OUTPUT_REG);
    /*01008*/ EMIT_STATE(PS_INPUT_COUNT, PS_INPUT_COUNT);
    /*0100C*/ EMIT_STATE(PS_TEMP_REGISTER_CONTROL, PS_TEMP_REGISTER_CONTROL);
    /*01010*/ EMIT_STATE(PS_CONTROL, PS_CONTROL);
    /*01018*/ EMIT_STATE(PS_START_PC, PS_START_PC);
    if (e->specs.has_shader_range_registers)
    {
        /*0101C*/ EMIT_STATE(PS_RANGE, PS_RANGE);
    }
    /*01400*/ EMIT_STATE(PE_DEPTH_CONFIG, PE_DEPTH_CONFIG);
    /*01404*/ EMIT_STATE(PE_DEPTH_NEAR, PE_DEPTH_NEAR);
    /*01408*/ EMIT_STATE(PE_DEPTH_FAR, PE_DEPTH_FAR);
    /*0140C*/ EMIT_STATE(PE_DEPTH_NORMALIZE, PE_DEPTH_NORMALIZE);
    if (ctx->conn->chip.pixel_pipes == 1)
    {
        /*01410*/ EMIT_STATE(PE_DEPTH_ADDR, PE_DEPTH_ADDR);
    }
    /*01414*/ EMIT_STATE(PE_DEPTH_STRIDE, PE_DEPTH_STRIDE);
    /*01418*/ EMIT_STATE(PE_STENCIL_OP, PE_STENCIL_OP);
    /*0141C*/ EMIT_STATE(PE_STENCIL_CONFIG, PE_STENCIL_CONFIG);
    /*01420*/ EMIT_STATE(PE_ALPHA_OP, PE_ALPHA_OP);
    /*01424*/ EMIT_STATE(PE_ALPHA_BLEND_COLOR, PE_ALPHA_BLEND_COLOR);
    /*01428*/ EMIT_STATE(PE_ALPHA_CONFIG, PE_ALPHA_CONFIG);
    /*0142C*/ EMIT_STATE(PE_COLOR_FORMAT, PE_COLOR_FORMAT);
    if (ctx->conn->chip.pixel_pipes == 1)
    {
        /*01430*/ EMIT_STATE(PE_COLOR_ADDR, PE_COLOR_ADDR);
    }
    /*01434*/ EMIT_STATE(PE_COLOR_STRIDE, PE_COLOR_STRIDE);
    /*01454*/ EMIT_STATE(PE_HDEPTH_CONTROL, PE_HDEPTH_CONTROL);
    if (ctx->conn->chip.pixel_pipes != 1)
    {
        for(int x=0; x<ctx->conn->chip.pixel_pipes; ++x)
        {
            /*01460*/ EMIT_STATE(PE_PIPE_COLOR_ADDR(x), PE_PIPE_COLOR_ADDR[x]);
        }
        for(int x=0; x<ctx->conn->chip.pixel_pipes; ++x)
        {
            /*01480*/ EMIT_STATE(PE_PIPE_DEPTH_ADDR(x), PE_PIPE_DEPTH_ADDR[x]);
        }
    }
    /*014A0*/ EMIT_STATE(PE_STENCIL_CONFIG_EXT, PE_STENCIL_CONFIG_EXT);
    /*014A4*/ EMIT_STATE(PE_LOGIC_OP, PE_LOGIC_OP);
    for(int x=0; x<2; ++x)
    {
        /*014A8*/ EMIT_STATE(PE_DITHER(x), PE_DITHER[x]);
    }
    /*01654*/ EMIT_STATE(TS_MEM_CONFIG, TS_MEM_CONFIG);
    /*01658*/ EMIT_STATE(TS_COLOR_STATUS_BASE, TS_COLOR_STATUS_BASE);
    /*0165C*/ EMIT_STATE(TS_COLOR_SURFACE_BASE, TS_COLOR_SURFACE_BASE);
    /*01660*/ EMIT_STATE(TS_COLOR_CLEAR_VALUE, TS_COLOR_CLEAR_VALUE);
    /*01664*/ EMIT_STATE(TS_DEPTH_STATUS_BASE, TS_DEPTH_STATUS_BASE);
    /*01668*/ EMIT_STATE(TS_DEPTH_SURFACE_BASE, TS_DEPTH_SURFACE_BASE);
    /*0166C*/ EMIT_STATE(TS_DEPTH_CLEAR_VALUE, TS_DEPTH_CLEAR_VALUE);
    for(int x=0; x<12; ++x)
    {
        /*02000*/ EMIT_STATE(TE_SAMPLER_CONFIG0(x), TE_SAMPLER_CONFIG0[x]);
    }
    for(int x=0; x<12; ++x)
    {
        /*02040*/ EMIT_STATE(TE_SAMPLER_SIZE(x), TE_SAMPLER_SIZE[x]);
    }
    for(int x=0; x<12; ++x)
    {
        /*02080*/ EMIT_STATE(TE_SAMPLER_LOG_SIZE(x), TE_SAMPLER_LOG_SIZE[x]);
    }
    for(int x=0; x<12; ++x)
    {
        /*020C0*/ EMIT_STATE(TE_SAMPLER_LOD_CONFIG(x), TE_SAMPLER_LOD_CONFIG[x]);
    }
    for(int x=0; x<12; ++x)
    {
        /*021C0*/ EMIT_STATE(TE_SAMPLER_CONFIG1(x), TE_SAMPLER_CONFIG1[x]);
    }
    for(int y=0; y<14; ++y)
    {
        for(int x=0; x<12; ++x)
        {
            /*02400*/ EMIT_STATE(TE_SAMPLER_LOD_ADDR(x, y), TE_SAMPLER_LOD_ADDR[y][x]);
        }
    }
    /*03814*/ EMIT_STATE(GL_VERTEX_ELEMENT_CONFIG, GL_VERTEX_ELEMENT_CONFIG);
    /*0381C*/ EMIT_STATE(GL_VARYING_TOTAL_COMPONENTS, GL_VARYING_TOTAL_COMPONENTS);
    /*03820*/ EMIT_STATE(GL_VARYING_NUM_COMPONENTS, GL_VARYING_NUM_COMPONENTS);
    for(int x=0; x<2; ++x)
    {
        /*03828*/ EMIT_STATE(GL_VARYING_COMPONENT_USE(x), GL_VARYING_COMPONENT_USE[x]);
    }
    /*0384C*/ EMIT_STATE(GL_API_MODE, GL_API_MODE);
    ETNA_COALESCE_STATE_CLOSE();
    /* end only EMIT_STATE */
#undef EMIT_STATE
#undef EMIT_STATE_FIXP
    /* re-submit current shader program and uniforms */
    /*04000 or 0C000*/
    etna_set_state_multi(ctx, e->specs.vs_offset, e->gpu3d.vs_inst_mem_size, e->gpu3d.VS_INST_MEM);
    /*06000 or 0D000*/
    etna_set_state_multi(ctx, e->specs.ps_offset, e->gpu3d.ps_inst_mem_size, e->gpu3d.PS_INST_MEM);
    /*05000*/ etna_set_state_multi(ctx, VIVS_VS_UNIFORMS(0), e->gpu3d.vs_uniforms_size, e->gpu3d.VS_UNIFORMS);
    /*07000*/ etna_set_state_multi(ctx, VIVS_PS_UNIFORMS(0), e->gpu3d.ps_uniforms_size, e->gpu3d.PS_UNIFORMS);
}

/* Weave state before draw operation. This function merges all the compiled state blocks under
 * the context into one device register state. Parts of this state that are changed since
 * last call (dirty) will be uploaded as state changes in the command buffer.
 */
static void sync_context(struct pipe_context *restrict pipe)
{
    struct etna_pipe_context *restrict e = etna_pipe_context(pipe);
    struct etna_ctx *restrict ctx = e->ctx;
    uint32_t active_samplers = active_samplers_bits(pipe);
    uint32_t dirty = e->dirty_bits;

    /* CSOs must be bound before calling this */
    assert(e->blend_p && e->rasterizer_p && e->depth_stencil_alpha_p && e->vertex_elements_p);

    /* Pre-processing: re-link shader if needed.
     */
    if(unlikely((dirty & ETNA_STATE_SHADER)) && e->vs && e->fs)
    {
        /* re-link vs and fs if needed */
        etna_link_shaders(pipe, &e->shader_state, e->vs, e->fs);
    }

    /* Pre-processing: see what caches we need to flush before making state
     * changes.
     */
    uint32_t to_flush = 0;
    if(unlikely(dirty & (ETNA_STATE_BLEND)))
    {
        /* Need flush COLOR when changing PE.COLOR_FORMAT.OVERWRITE.
         */
        if((e->gpu3d.PE_COLOR_FORMAT & VIVS_PE_COLOR_FORMAT_OVERWRITE) !=
           (e->blend.PE_COLOR_FORMAT & VIVS_PE_COLOR_FORMAT_OVERWRITE))
            to_flush |= VIVS_GL_FLUSH_CACHE_COLOR;
    }
    if(unlikely(dirty & (ETNA_STATE_TEXTURE_CACHES)))
        to_flush |= VIVS_GL_FLUSH_CACHE_TEXTURE;
    if(unlikely(dirty & (ETNA_STATE_FRAMEBUFFER))) /* Framebuffer config changed? */
        to_flush |= VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH;
    if(DBG_ENABLED(ETNA_DBG_CFLUSH_ALL))
        to_flush |= VIVS_GL_FLUSH_CACHE_TEXTURE | VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH;
    if(to_flush)
    {
        etna_set_state(ctx, VIVS_GL_FLUSH_CACHE, to_flush);
        etna_stall(ctx, SYNC_RECIPIENT_RA, SYNC_RECIPIENT_PE);
    }

    /* If MULTI_SAMPLE_CONFIG.MSAA_SAMPLES changed, clobber affected shader
     * state to make sure it is always rewritten. */
    if(unlikely(dirty & (ETNA_STATE_FRAMEBUFFER)))
    {
        if((e->gpu3d.GL_MULTI_SAMPLE_CONFIG & VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES__MASK) !=
           (e->framebuffer.GL_MULTI_SAMPLE_CONFIG & VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES__MASK))
        {
            /* XXX what does the GPU set these states to on MSAA samples change? Does it do the right thing?
             * (increase/decrease as necessary) or something else? Just set some invalid value until we know for
             * sure. */
            e->gpu3d.PS_INPUT_COUNT = 0xffffffff;
            e->gpu3d.PS_TEMP_REGISTER_CONTROL = 0xffffffff;
        }
    }

    /*
     * Cached state update emission.
     * The etna_3d_state structure e->gpu3d is used to keep the current context.
     * State is only emitted if the new value of the register is different from the cached value
     * in the context. Update the state afterwards.
     */
#define EMIT_STATE(state_name, dest_field, src_value) \
    if(e->gpu3d.dest_field != (src_value)) { \
        ETNA_COALESCE_STATE_UPDATE(state_name, src_value, 0) \
        e->gpu3d.dest_field = (src_value); \
    }

#define EMIT_STATE_FIXP(state_name, dest_field, src_value) \
    if(e->gpu3d.dest_field != (src_value)) { \
        ETNA_COALESCE_STATE_UPDATE(state_name, src_value, 1) \
        e->gpu3d.dest_field = (src_value); \
    }

    /* Update vertex elements. This is different from any of the other states, in that
     * a) the number of vertex elements written matters: so write only active ones
     * b) the vertex element states must all be written: do not skip entries that stay the same
     */
    if(dirty & (ETNA_STATE_VERTEX_ELEMENTS))
    {
        if(e->gpu3d.num_vertex_elements != e->vertex_elements.num_elements ||
           memcmp(e->gpu3d.FE_VERTEX_ELEMENT_CONFIG, e->vertex_elements.FE_VERTEX_ELEMENT_CONFIG, e->gpu3d.num_vertex_elements * 4))
        {
            /* Special case: vertex elements must always be sent in full if changed */
            /*00600*/ etna_set_state_multi(ctx, VIVS_FE_VERTEX_ELEMENT_CONFIG(0), e->vertex_elements.num_elements, e->vertex_elements.FE_VERTEX_ELEMENT_CONFIG);
            memcpy(e->gpu3d.FE_VERTEX_ELEMENT_CONFIG, e->vertex_elements.FE_VERTEX_ELEMENT_CONFIG, e->vertex_elements.num_elements * 4);

            e->gpu3d.num_vertex_elements = e->vertex_elements.num_elements;
        }
    }

    /* The following code is originally generated by gen_merge_state.py, to
     * emit state in increasing order of address (this makes it possible to merge
     * consecutive register updates into one SET_STATE command)
     *
     * There have been some manual changes, where the weaving operation is not
     * simply bitwise or:
     * - scissor fixp
     * - num vertex elements
     * - scissor handling
     * - num samplers
     * - texture lod
     * - ETNA_STATE_TS
     * - removed ETNA_STATE_BASE_SETUP statements -- these are guaranteed to not change anyway
     * - PS / framebuffer interaction for MSAA
     * - move update of GL_MULTI_SAMPLE_CONFIG first
     * - add unlikely()/likely()
     */
    uint32_t last_reg, last_fixp, span_start;
    ETNA_COALESCE_STATE_OPEN(ETNA_3D_CONTEXT_SIZE);
    /* begin only EMIT_STATE -- make sure no new etna_reserve calls are done here directly
     *    or indirectly */
    /* multi sample config is set first, and outside of the normal sorting
     * order, as changing the multisample state clobbers PS.INPUT_COUNT (and
     * possibly PS.TEMP_REGISTER_CONTROL).
     */
    if(unlikely(dirty & (ETNA_STATE_FRAMEBUFFER | ETNA_STATE_SAMPLE_MASK)))
    {
        /*03818*/ EMIT_STATE(GL_MULTI_SAMPLE_CONFIG, GL_MULTI_SAMPLE_CONFIG, e->sample_mask.GL_MULTI_SAMPLE_CONFIG | e->framebuffer.GL_MULTI_SAMPLE_CONFIG);
    }
    if(likely(dirty & (ETNA_STATE_INDEX_BUFFER)))
    {
        /*00644*/ EMIT_STATE(FE_INDEX_STREAM_BASE_ADDR, FE_INDEX_STREAM_BASE_ADDR, e->index_buffer.FE_INDEX_STREAM_BASE_ADDR);
        /*00648*/ EMIT_STATE(FE_INDEX_STREAM_CONTROL, FE_INDEX_STREAM_CONTROL, e->index_buffer.FE_INDEX_STREAM_CONTROL);
    }
    if(likely(dirty & (ETNA_STATE_VERTEX_BUFFERS)))
    {
        /*0064C*/ EMIT_STATE(FE_VERTEX_STREAM_BASE_ADDR, FE_VERTEX_STREAM_BASE_ADDR, e->vertex_buffer[0].FE_VERTEX_STREAM_BASE_ADDR);
        /*00650*/ EMIT_STATE(FE_VERTEX_STREAM_CONTROL, FE_VERTEX_STREAM_CONTROL, e->vertex_buffer[0].FE_VERTEX_STREAM_CONTROL);
        if (e->specs.has_shader_range_registers)
        {
            for(int x=0; x<8; ++x)
            {
                /*00680*/ EMIT_STATE(FE_VERTEX_STREAMS_BASE_ADDR(x), FE_VERTEX_STREAMS_BASE_ADDR[x], e->vertex_buffer[x].FE_VERTEX_STREAM_BASE_ADDR);
            }
            for(int x=0; x<8; ++x)
            {
                /*006A0*/ EMIT_STATE(FE_VERTEX_STREAMS_CONTROL(x), FE_VERTEX_STREAMS_CONTROL[x], e->vertex_buffer[x].FE_VERTEX_STREAM_CONTROL);
            }
        }
    }
    if(unlikely(dirty & (ETNA_STATE_SHADER)))
    {
        /*00800*/ EMIT_STATE(VS_END_PC, VS_END_PC, e->shader_state.VS_END_PC);
    }
    if(unlikely(dirty & (ETNA_STATE_SHADER | ETNA_STATE_RASTERIZER)))
    {
        /*00804*/ EMIT_STATE(VS_OUTPUT_COUNT, VS_OUTPUT_COUNT,
                e->rasterizer.point_size_per_vertex ? e->shader_state.VS_OUTPUT_COUNT_PSIZE : e->shader_state.VS_OUTPUT_COUNT);
    }
    if(unlikely(dirty & (ETNA_STATE_VERTEX_ELEMENTS | ETNA_STATE_SHADER)))
    {
        /*00808*/ EMIT_STATE(VS_INPUT_COUNT, VS_INPUT_COUNT, VIVS_VS_INPUT_COUNT_COUNT(e->vertex_elements.num_elements) | e->shader_state.VS_INPUT_COUNT);
    }
    if(unlikely(dirty & (ETNA_STATE_SHADER)))
    {
        /*0080C*/ EMIT_STATE(VS_TEMP_REGISTER_CONTROL, VS_TEMP_REGISTER_CONTROL, e->shader_state.VS_TEMP_REGISTER_CONTROL);
        for(int x=0; x<4; ++x)
        {
            /*00810*/ EMIT_STATE(VS_OUTPUT(x), VS_OUTPUT[x], e->shader_state.VS_OUTPUT[x]);
        }
        for(int x=0; x<4; ++x)
        {
            /*00820*/ EMIT_STATE(VS_INPUT(x), VS_INPUT[x], e->shader_state.VS_INPUT[x]);
        }
        /*00830*/ EMIT_STATE(VS_LOAD_BALANCING, VS_LOAD_BALANCING, e->shader_state.VS_LOAD_BALANCING);
        /*00838*/ EMIT_STATE(VS_START_PC, VS_START_PC, e->shader_state.VS_START_PC);
        if (e->specs.has_shader_range_registers)
        {
            /*0085C*/ EMIT_STATE(VS_RANGE, VS_RANGE, (e->shader_state.vs_inst_mem_size/4-1)<<16);
        }
    }
    if(unlikely(dirty & (ETNA_STATE_VIEWPORT)))
    {
        /*00A00*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_X, PA_VIEWPORT_SCALE_X, e->viewport.PA_VIEWPORT_SCALE_X);
        /*00A04*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_Y, PA_VIEWPORT_SCALE_Y, e->viewport.PA_VIEWPORT_SCALE_Y);
        /*00A08*/ EMIT_STATE(PA_VIEWPORT_SCALE_Z, PA_VIEWPORT_SCALE_Z, e->viewport.PA_VIEWPORT_SCALE_Z);
        /*00A0C*/ EMIT_STATE_FIXP(PA_VIEWPORT_OFFSET_X, PA_VIEWPORT_OFFSET_X, e->viewport.PA_VIEWPORT_OFFSET_X);
        /*00A10*/ EMIT_STATE_FIXP(PA_VIEWPORT_OFFSET_Y, PA_VIEWPORT_OFFSET_Y, e->viewport.PA_VIEWPORT_OFFSET_Y);
        /*00A14*/ EMIT_STATE(PA_VIEWPORT_OFFSET_Z, PA_VIEWPORT_OFFSET_Z, e->viewport.PA_VIEWPORT_OFFSET_Z);
    }
    if(unlikely(dirty & (ETNA_STATE_RASTERIZER)))
    {
        /*00A18*/ EMIT_STATE(PA_LINE_WIDTH, PA_LINE_WIDTH, e->rasterizer.PA_LINE_WIDTH);
        /*00A1C*/ EMIT_STATE(PA_POINT_SIZE, PA_POINT_SIZE, e->rasterizer.PA_POINT_SIZE);
        /*00A28*/ EMIT_STATE(PA_SYSTEM_MODE, PA_SYSTEM_MODE, e->rasterizer.PA_SYSTEM_MODE);
    }
    if(unlikely(dirty & (ETNA_STATE_SHADER)))
    {
        /*00A30*/ EMIT_STATE(PA_ATTRIBUTE_ELEMENT_COUNT, PA_ATTRIBUTE_ELEMENT_COUNT, e->shader_state.PA_ATTRIBUTE_ELEMENT_COUNT);
    }
    if(unlikely(dirty & (ETNA_STATE_RASTERIZER | ETNA_STATE_SHADER)))
    {
        /*00A34*/ EMIT_STATE(PA_CONFIG, PA_CONFIG, e->rasterizer.PA_CONFIG & e->shader_state.PA_CONFIG);
    }
    if(unlikely(dirty & (ETNA_STATE_SHADER)))
    {
        for(int x=0; x<10; ++x)
        {
            /*00A40*/ EMIT_STATE(PA_SHADER_ATTRIBUTES(x), PA_SHADER_ATTRIBUTES[x], e->shader_state.PA_SHADER_ATTRIBUTES[x]);
        }
    }
    if(unlikely(dirty & (ETNA_STATE_SCISSOR | ETNA_STATE_FRAMEBUFFER | ETNA_STATE_RASTERIZER | ETNA_STATE_VIEWPORT)))
    {
        /* this is a bit of a mess: rasterizer.scissor determines whether to use only the
         * framebuffer scissor, or specific scissor state, and the viewport clips too so the logic
         * spans four CSOs
         */
        uint32_t scissor_left = MAX2(e->framebuffer.SE_SCISSOR_LEFT, e->viewport.SE_SCISSOR_LEFT);
        uint32_t scissor_top = MAX2(e->framebuffer.SE_SCISSOR_TOP, e->viewport.SE_SCISSOR_TOP);
        uint32_t scissor_right = MIN2(e->framebuffer.SE_SCISSOR_RIGHT, e->viewport.SE_SCISSOR_RIGHT);
        uint32_t scissor_bottom = MIN2(e->framebuffer.SE_SCISSOR_BOTTOM, e->viewport.SE_SCISSOR_BOTTOM);
        if(e->rasterizer.scissor)
        {
            scissor_left = MAX2(e->scissor.SE_SCISSOR_LEFT, scissor_left);
            scissor_top = MAX2(e->scissor.SE_SCISSOR_TOP, scissor_top);
            scissor_right = MIN2(e->scissor.SE_SCISSOR_RIGHT, scissor_right);
            scissor_bottom = MIN2(e->scissor.SE_SCISSOR_BOTTOM, scissor_bottom);
        }
        /*00C00*/ EMIT_STATE_FIXP(SE_SCISSOR_LEFT, SE_SCISSOR_LEFT, scissor_left);
        /*00C04*/ EMIT_STATE_FIXP(SE_SCISSOR_TOP, SE_SCISSOR_TOP, scissor_top);
        /*00C08*/ EMIT_STATE_FIXP(SE_SCISSOR_RIGHT, SE_SCISSOR_RIGHT, scissor_right);
        /*00C0C*/ EMIT_STATE_FIXP(SE_SCISSOR_BOTTOM, SE_SCISSOR_BOTTOM, scissor_bottom);
    }
    if(unlikely(dirty & (ETNA_STATE_RASTERIZER)))
    {
        /*00C10*/ EMIT_STATE(SE_DEPTH_SCALE, SE_DEPTH_SCALE, e->rasterizer.SE_DEPTH_SCALE);
        /*00C14*/ EMIT_STATE(SE_DEPTH_BIAS, SE_DEPTH_BIAS, e->rasterizer.SE_DEPTH_BIAS);
        /*00C18*/ EMIT_STATE(SE_CONFIG, SE_CONFIG, e->rasterizer.SE_CONFIG);
    }
    if(unlikely(dirty & (ETNA_STATE_SHADER)))
    {
        /*00E00*/ EMIT_STATE(RA_CONTROL, RA_CONTROL, e->shader_state.RA_CONTROL);
    }
    if(unlikely(dirty & (ETNA_STATE_FRAMEBUFFER)))
    {
        /*00E04*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E04, RA_MULTISAMPLE_UNK00E04, e->framebuffer.RA_MULTISAMPLE_UNK00E04);
        for(int x=0; x<4; ++x)
        {
            /*00E10*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E10(x), RA_MULTISAMPLE_UNK00E10[x], e->framebuffer.RA_MULTISAMPLE_UNK00E10[x]);
        }
        for(int x=0; x<16; ++x)
        {
            /*00E40*/ EMIT_STATE(RA_CENTROID_TABLE(x), RA_CENTROID_TABLE[x], e->framebuffer.RA_CENTROID_TABLE[x]);
        }
    }
    if(unlikely(dirty & (ETNA_STATE_SHADER | ETNA_STATE_FRAMEBUFFER)))
    {
        /*01000*/ EMIT_STATE(PS_END_PC, PS_END_PC, e->shader_state.PS_END_PC);
        /*01004*/ EMIT_STATE(PS_OUTPUT_REG, PS_OUTPUT_REG, e->shader_state.PS_OUTPUT_REG);
        /*01008*/ EMIT_STATE(PS_INPUT_COUNT, PS_INPUT_COUNT,
                e->framebuffer.msaa_mode ?
                    e->shader_state.PS_INPUT_COUNT_MSAA :
                    e->shader_state.PS_INPUT_COUNT);
        /*0100C*/ EMIT_STATE(PS_TEMP_REGISTER_CONTROL, PS_TEMP_REGISTER_CONTROL,
                e->framebuffer.msaa_mode ?
                    e->shader_state.PS_TEMP_REGISTER_CONTROL_MSAA :
                    e->shader_state.PS_TEMP_REGISTER_CONTROL);
        /*01010*/ EMIT_STATE(PS_CONTROL, PS_CONTROL, e->shader_state.PS_CONTROL);
        /*01018*/ EMIT_STATE(PS_START_PC, PS_START_PC, e->shader_state.PS_START_PC);
        if (e->specs.has_shader_range_registers)
        {
            /*0101C*/ EMIT_STATE(PS_RANGE, PS_RANGE, ((e->shader_state.ps_inst_mem_size/4-1+0x100)<<16) | 0x100);
        }
    }
    if(unlikely(dirty & (ETNA_STATE_DSA | ETNA_STATE_FRAMEBUFFER)))
    {
        /*01400*/ EMIT_STATE(PE_DEPTH_CONFIG, PE_DEPTH_CONFIG, e->depth_stencil_alpha.PE_DEPTH_CONFIG | e->framebuffer.PE_DEPTH_CONFIG);
    }
    if(unlikely(dirty & (ETNA_STATE_VIEWPORT)))
    {
        /*01404*/ EMIT_STATE(PE_DEPTH_NEAR, PE_DEPTH_NEAR, e->viewport.PE_DEPTH_NEAR);
        /*01408*/ EMIT_STATE(PE_DEPTH_FAR, PE_DEPTH_FAR, e->viewport.PE_DEPTH_FAR);
    }
    if(unlikely(dirty & (ETNA_STATE_FRAMEBUFFER)))
    {
        /*0140C*/ EMIT_STATE(PE_DEPTH_NORMALIZE, PE_DEPTH_NORMALIZE, e->framebuffer.PE_DEPTH_NORMALIZE);

        if (ctx->conn->chip.pixel_pipes == 1)
        {
            /*01410*/ EMIT_STATE(PE_DEPTH_ADDR, PE_DEPTH_ADDR, e->framebuffer.PE_DEPTH_ADDR);
        }

        /*01414*/ EMIT_STATE(PE_DEPTH_STRIDE, PE_DEPTH_STRIDE, e->framebuffer.PE_DEPTH_STRIDE);
    }
    if(unlikely(dirty & (ETNA_STATE_DSA)))
    {
        /*01418*/ EMIT_STATE(PE_STENCIL_OP, PE_STENCIL_OP, e->depth_stencil_alpha.PE_STENCIL_OP);
    }
    if(unlikely(dirty & (ETNA_STATE_DSA | ETNA_STATE_STENCIL_REF)))
    {
        /*0141C*/ EMIT_STATE(PE_STENCIL_CONFIG, PE_STENCIL_CONFIG, e->depth_stencil_alpha.PE_STENCIL_CONFIG | e->stencil_ref.PE_STENCIL_CONFIG);
    }
    if(unlikely(dirty & (ETNA_STATE_DSA)))
    {
        /*01420*/ EMIT_STATE(PE_ALPHA_OP, PE_ALPHA_OP, e->depth_stencil_alpha.PE_ALPHA_OP);
    }
    if(unlikely(dirty & (ETNA_STATE_BLEND_COLOR)))
    {
        /*01424*/ EMIT_STATE(PE_ALPHA_BLEND_COLOR, PE_ALPHA_BLEND_COLOR, e->blend_color.PE_ALPHA_BLEND_COLOR);
    }
    if(unlikely(dirty & (ETNA_STATE_BLEND)))
    {
        /*01428*/ EMIT_STATE(PE_ALPHA_CONFIG, PE_ALPHA_CONFIG, e->blend.PE_ALPHA_CONFIG);
    }
    if(unlikely(dirty & (ETNA_STATE_BLEND | ETNA_STATE_FRAMEBUFFER)))
    {
        /*0142C*/ EMIT_STATE(PE_COLOR_FORMAT, PE_COLOR_FORMAT, e->blend.PE_COLOR_FORMAT | e->framebuffer.PE_COLOR_FORMAT);
    }
    if(unlikely(dirty & (ETNA_STATE_FRAMEBUFFER)))
    {
        if (ctx->conn->chip.pixel_pipes == 1)
        {
            /*01430*/ EMIT_STATE(PE_COLOR_ADDR, PE_COLOR_ADDR, e->framebuffer.PE_COLOR_ADDR);
            /*01434*/ EMIT_STATE(PE_COLOR_STRIDE, PE_COLOR_STRIDE, e->framebuffer.PE_COLOR_STRIDE);
            /*01454*/ EMIT_STATE(PE_HDEPTH_CONTROL, PE_HDEPTH_CONTROL, e->framebuffer.PE_HDEPTH_CONTROL);
        }
        else if (ctx->conn->chip.pixel_pipes == 2)
        {
            /*01434*/ EMIT_STATE(PE_COLOR_STRIDE, PE_COLOR_STRIDE, e->framebuffer.PE_COLOR_STRIDE);
            /*01454*/ EMIT_STATE(PE_HDEPTH_CONTROL, PE_HDEPTH_CONTROL, e->framebuffer.PE_HDEPTH_CONTROL);
            /*01460*/ EMIT_STATE(PE_PIPE_COLOR_ADDR(0), PE_PIPE_COLOR_ADDR[0], e->framebuffer.PE_PIPE_COLOR_ADDR[0]);
            /*01464*/ EMIT_STATE(PE_PIPE_COLOR_ADDR(1), PE_PIPE_COLOR_ADDR[1], e->framebuffer.PE_PIPE_COLOR_ADDR[1]);
            /*01480*/ EMIT_STATE(PE_PIPE_DEPTH_ADDR(0), PE_PIPE_DEPTH_ADDR[0], e->framebuffer.PE_PIPE_DEPTH_ADDR[0]);
            /*01484*/ EMIT_STATE(PE_PIPE_DEPTH_ADDR(1), PE_PIPE_DEPTH_ADDR[1], e->framebuffer.PE_PIPE_DEPTH_ADDR[1]);
        }
    }
    if(unlikely(dirty & (ETNA_STATE_STENCIL_REF)))
    {
        /*014A0*/ EMIT_STATE(PE_STENCIL_CONFIG_EXT, PE_STENCIL_CONFIG_EXT, e->stencil_ref.PE_STENCIL_CONFIG_EXT);
    }
    if(unlikely(dirty & (ETNA_STATE_BLEND)))
    {
        /*014A4*/ EMIT_STATE(PE_LOGIC_OP, PE_LOGIC_OP, e->blend.PE_LOGIC_OP);
        for(int x=0; x<2; ++x)
        {
            /*014A8*/ EMIT_STATE(PE_DITHER(x), PE_DITHER[x], e->blend.PE_DITHER[x]);
        }
    }
    if(unlikely(dirty & (ETNA_STATE_FRAMEBUFFER | ETNA_STATE_TS)))
    {
        /*01654*/ EMIT_STATE(TS_MEM_CONFIG, TS_MEM_CONFIG, e->framebuffer.TS_MEM_CONFIG);
        /*01658*/ EMIT_STATE(TS_COLOR_STATUS_BASE, TS_COLOR_STATUS_BASE, e->framebuffer.TS_COLOR_STATUS_BASE);
        /*0165C*/ EMIT_STATE(TS_COLOR_SURFACE_BASE, TS_COLOR_SURFACE_BASE, e->framebuffer.TS_COLOR_SURFACE_BASE);
        /*01660*/ EMIT_STATE(TS_COLOR_CLEAR_VALUE, TS_COLOR_CLEAR_VALUE, e->framebuffer.TS_COLOR_CLEAR_VALUE);
        /*01664*/ EMIT_STATE(TS_DEPTH_STATUS_BASE, TS_DEPTH_STATUS_BASE, e->framebuffer.TS_DEPTH_STATUS_BASE);
        /*01668*/ EMIT_STATE(TS_DEPTH_SURFACE_BASE, TS_DEPTH_SURFACE_BASE, e->framebuffer.TS_DEPTH_SURFACE_BASE);
        /*0166C*/ EMIT_STATE(TS_DEPTH_CLEAR_VALUE, TS_DEPTH_CLEAR_VALUE, e->framebuffer.TS_DEPTH_CLEAR_VALUE);
    }
    if(unlikely(dirty & (ETNA_STATE_SAMPLER_VIEWS | ETNA_STATE_SAMPLERS)))
    {
        for(int x=0; x<VIVS_TE_SAMPLER__LEN; ++x)
        {
            /* set active samplers to their configuration value (determined by both the sampler state and sampler view),
             * set inactive sampler config to 0 */
            /*02000*/ EMIT_STATE(TE_SAMPLER_CONFIG0(x), TE_SAMPLER_CONFIG0[x],
                    ((1<<x) & active_samplers)?(
                        (e->sampler[x].TE_SAMPLER_CONFIG0 & e->sampler_view[x].TE_SAMPLER_CONFIG0_MASK) |
                        e->sampler_view[x].TE_SAMPLER_CONFIG0):0);
        }
    }
    if(unlikely(dirty & (ETNA_STATE_SAMPLER_VIEWS)))
    {
        for(int x=0; x<VIVS_TE_SAMPLER__LEN; ++x)
        {
            if((1<<x) & active_samplers)
            {
                /*02040*/ EMIT_STATE(TE_SAMPLER_SIZE(x), TE_SAMPLER_SIZE[x], e->sampler_view[x].TE_SAMPLER_SIZE);
            }
        }
        for(int x=0; x<VIVS_TE_SAMPLER__LEN; ++x)
        {
            if((1<<x) & active_samplers)
            {
                /*02080*/ EMIT_STATE(TE_SAMPLER_LOG_SIZE(x), TE_SAMPLER_LOG_SIZE[x], e->sampler_view[x].TE_SAMPLER_LOG_SIZE);
            }
        }
    }
    if(unlikely(dirty & (ETNA_STATE_SAMPLER_VIEWS | ETNA_STATE_SAMPLERS)))
    {
        for(int x=0; x<VIVS_TE_SAMPLER__LEN; ++x)
        {
            if((1<<x) & active_samplers)
            {
                /* min and max lod is determined both by the sampler and the view */
                /*020C0*/ EMIT_STATE(TE_SAMPLER_LOD_CONFIG(x), TE_SAMPLER_LOD_CONFIG[x],
                        e->sampler[x].TE_SAMPLER_LOD_CONFIG |
                        VIVS_TE_SAMPLER_LOD_CONFIG_MAX(MIN2(e->sampler[x].max_lod, e->sampler_view[x].max_lod)) |
                        VIVS_TE_SAMPLER_LOD_CONFIG_MIN(MAX2(e->sampler[x].min_lod, e->sampler_view[x].min_lod)));
            }
        }
        for(int x=0; x<VIVS_TE_SAMPLER__LEN; ++x)
        {
            if((1<<x) & active_samplers)
            {
                /*021C0*/ EMIT_STATE(TE_SAMPLER_CONFIG1(x), TE_SAMPLER_CONFIG1[x],
                        e->sampler[x].TE_SAMPLER_CONFIG1 | e->sampler_view[x].TE_SAMPLER_CONFIG1);
            }
        }
    }
    if(unlikely(dirty & (ETNA_STATE_SAMPLER_VIEWS)))
    {
        for(int y=0; y<VIVS_TE_SAMPLER_LOD_ADDR__LEN; ++y)
        {
            for(int x=0; x<VIVS_TE_SAMPLER__LEN; ++x)
            {
                if((1<<x) & active_samplers)
                {
                    /*02400*/ EMIT_STATE(TE_SAMPLER_LOD_ADDR(x, y), TE_SAMPLER_LOD_ADDR[y][x], e->sampler_view[x].TE_SAMPLER_LOD_ADDR[y]);
                }
            }
        }
    }
    if(unlikely(dirty & (ETNA_STATE_SHADER)))
    {
        /*0381C*/ EMIT_STATE(GL_VARYING_TOTAL_COMPONENTS, GL_VARYING_TOTAL_COMPONENTS, e->shader_state.GL_VARYING_TOTAL_COMPONENTS);
        /*03820*/ EMIT_STATE(GL_VARYING_NUM_COMPONENTS, GL_VARYING_NUM_COMPONENTS, e->shader_state.GL_VARYING_NUM_COMPONENTS);
        for(int x=0; x<2; ++x)
        {
            /*03828*/ EMIT_STATE(GL_VARYING_COMPONENT_USE(x), GL_VARYING_COMPONENT_USE[x], e->shader_state.GL_VARYING_COMPONENT_USE[x]);
        }
    }
    ETNA_COALESCE_STATE_CLOSE();
    /* end only EMIT_STATE */
    /**** Large dynamically-sized state ****/
    if(dirty & (ETNA_STATE_SHADER))
    {
        /* Special case: a new shader was loaded; simply re-load all uniforms and shader code at once */
        /*04000 or 0C000*/
        etna_set_state_multi(ctx, e->specs.vs_offset, e->shader_state.vs_inst_mem_size, e->shader_state.VS_INST_MEM);
        /*06000 or 0D000*/
        etna_set_state_multi(ctx, e->specs.ps_offset, e->shader_state.ps_inst_mem_size, e->shader_state.PS_INST_MEM);
        /*05000*/ etna_set_state_multi(ctx, VIVS_VS_UNIFORMS(0), e->shader_state.vs_uniforms_size, e->shader_state.VS_UNIFORMS);
        /*07000*/ etna_set_state_multi(ctx, VIVS_PS_UNIFORMS(0), e->shader_state.ps_uniforms_size, e->shader_state.PS_UNIFORMS);

        /* Copy uniforms to gpu3d, so that incremental updates to uniforms are possible as long as the
         * same shader remains bound */
        e->gpu3d.vs_uniforms_size = e->shader_state.vs_uniforms_size;
        e->gpu3d.ps_uniforms_size = e->shader_state.ps_uniforms_size;
        e->gpu3d.vs_inst_mem_size = e->shader_state.vs_inst_mem_size;
        e->gpu3d.ps_inst_mem_size = e->shader_state.ps_inst_mem_size;
        memcpy(e->gpu3d.VS_UNIFORMS, e->shader_state.VS_UNIFORMS, e->shader_state.vs_uniforms_size * 4);
        memcpy(e->gpu3d.PS_UNIFORMS, e->shader_state.PS_UNIFORMS, e->shader_state.ps_uniforms_size * 4);
        memcpy(e->gpu3d.VS_INST_MEM, e->shader_state.VS_INST_MEM, e->shader_state.vs_inst_mem_size * 4);
        memcpy(e->gpu3d.PS_INST_MEM, e->shader_state.PS_INST_MEM, e->shader_state.ps_inst_mem_size * 4);
    }
    else
    {
        /* If new uniforms loaded with current shader, only submit what changed */
        if(dirty & (ETNA_STATE_VS_UNIFORMS))
        {
            ETNA_COALESCE_STATE_OPEN(e->shader_state.vs_uniforms_size); /* worst case */
            for(int x=0; x<e->shader_state.vs_uniforms_size; ++x)
            {
                /*05000*/ EMIT_STATE(VS_UNIFORMS(x), VS_UNIFORMS[x], e->shader_state.VS_UNIFORMS[x]);
            }
            ETNA_COALESCE_STATE_CLOSE();
        }
        if(dirty & (ETNA_STATE_PS_UNIFORMS))
        {
            ETNA_COALESCE_STATE_OPEN(e->shader_state.ps_uniforms_size); /* worst case */
            for(int x=0; x<e->shader_state.ps_uniforms_size; ++x)
            {
                /*07000*/ EMIT_STATE(PS_UNIFORMS(x), PS_UNIFORMS[x], e->shader_state.PS_UNIFORMS[x]);
            }
            ETNA_COALESCE_STATE_CLOSE();
        }
    }
    /**** End of state update ****/
#undef EMIT_STATE
#undef EMIT_STATE_FIXP

    e->dirty_bits = 0;
}

/** Build new explicit context for etna. This is a command buffer that contains
 * all commands needed to set up the GPU to current state, to be used after a context
 * switch (when multiple processes are using the GPU at once).
 *
 * This function is called as callback by etna_flush for kernel drivers
 * that require an explicit context)
 */
static int update_context(void *pipe, struct etna_ctx *ctx, enum etna_pipe *initial_pipe, enum etna_pipe *final_pipe)
{
    reset_context((struct pipe_context*) pipe);
    *initial_pipe = ETNA_PIPE_3D;
    *final_pipe = ETNA_PIPE_3D;
    return ETNA_OK;
}

/*********************************************************************/

/** Destroy etna pipe. After calling this the pipe object must never be
 * used again.
 */
static void etna_pipe_destroy(struct pipe_context *pipe)
{
    struct etna_pipe_context *priv = etna_pipe_context(pipe);
    etna_pipe_clear_blit_destroy(pipe);
    etna_pipe_transfer_destroy(pipe);
    etna_free(priv->ctx);
    FREE(pipe);
}

/** Main draw function. Draw primitives from a vertex buffer object,
 * using optonally an index buffer.
 */
static void etna_pipe_draw_vbo(struct pipe_context *pipe,
                 const struct pipe_draw_info *info)
{
    struct etna_pipe_context *priv = etna_pipe_context(pipe);
    if(priv->vertex_elements_p == NULL || priv->vertex_elements.num_elements == 0)
        return; /* Nothing to do */
    int prims = u_decomposed_prims_for_vertices(info->mode, info->count);
    if(unlikely(prims <= 0))
    {
        DBG("Invalid draw primitive mode=%i or no primitives to be drawn", info->mode);
        return;
    }
    /* First, sync state, then emit DRAW_PRIMITIVES or DRAW_INDEXED_PRIMITIVES */
    sync_context(pipe);
    if(priv->vs && priv->vertex_elements.num_elements != priv->vs->num_inputs)
    {
        BUG("Number of elements %i does not match the number of VS inputs %i",
                priv->vertex_elements.num_elements, priv->vs->num_inputs);
        return;
    }
    if(info->indexed)
    {
        etna_draw_indexed_primitives(priv->ctx, translate_draw_mode(info->mode),
                info->start, prims, info->index_bias);
    } else
    {
        etna_draw_primitives(priv->ctx, translate_draw_mode(info->mode),
                info->start, prims);
    }
    if(DBG_ENABLED(ETNA_DBG_FLUSH_ALL))
    {
        pipe->flush(pipe, NULL, 0);
    }
}

/** Create vertex element states, which define a layout for fetching
 * vertices for rendering.
 */
static void *etna_pipe_create_vertex_elements_state(struct pipe_context *pipe,
                                      unsigned num_elements,
                                      const struct pipe_vertex_element *elements)
{
    struct etna_pipe_context *priv = etna_pipe_context(pipe);
    struct compiled_vertex_elements_state *cs = CALLOC_STRUCT(compiled_vertex_elements_state);
    /* XXX could minimize number of consecutive stretches here by sorting, and
     * permuting the inputs in shader or does Mesa do this already? */

    /* Check that vertex element binding is compatible with hardware; thus
     * elements[idx].vertex_buffer_index are < stream_count. If not, the binding
     * uses more streams than is supported, and u_vbuf should have done some reorganization
     * for compatibility.
     */
    bool incompatible = false;
    for(unsigned idx=0; idx<num_elements; ++idx)
    {
        if(elements[idx].vertex_buffer_index >= priv->specs.stream_count ||
           elements[idx].instance_divisor > 0)
            incompatible = true;
    }
    cs->num_elements = num_elements;
    if(incompatible || num_elements == 0)
    {
        DBG("Error: zero vertex elements, or more vertex buffers used than supported");
        FREE(cs);
        return NULL;
    }
    unsigned start_offset = 0; /* start of current consecutive stretch */
    bool nonconsecutive = true; /* previous value of nonconsecutive */
    for(unsigned idx=0; idx<num_elements; ++idx)
    {
        unsigned element_size = util_format_get_blocksize(elements[idx].src_format);
        unsigned end_offset = elements[idx].src_offset + element_size;
        if(nonconsecutive)
            start_offset = elements[idx].src_offset;
        assert(element_size != 0 && end_offset <= 256); /* maximum vertex size is 256 bytes */
        /* check whether next element is consecutive to this one */
        nonconsecutive = (idx == (num_elements-1)) ||
                    elements[idx+1].vertex_buffer_index != elements[idx].vertex_buffer_index ||
                    end_offset != elements[idx+1].src_offset;
        cs->FE_VERTEX_ELEMENT_CONFIG[idx] =
                (nonconsecutive ? VIVS_FE_VERTEX_ELEMENT_CONFIG_NONCONSECUTIVE : 0) |
                translate_vertex_format_type(elements[idx].src_format, false) |
                VIVS_FE_VERTEX_ELEMENT_CONFIG_NUM(util_format_get_nr_components(elements[idx].src_format)) |
                translate_vertex_format_normalize(elements[idx].src_format) |
                VIVS_FE_VERTEX_ELEMENT_CONFIG_ENDIAN(ENDIAN_MODE_NO_SWAP) |
                VIVS_FE_VERTEX_ELEMENT_CONFIG_STREAM(elements[idx].vertex_buffer_index) |
                VIVS_FE_VERTEX_ELEMENT_CONFIG_START(elements[idx].src_offset) |
                VIVS_FE_VERTEX_ELEMENT_CONFIG_END(end_offset - start_offset);
    }
    return cs;
}

static void etna_pipe_bind_vertex_elements_state(struct pipe_context *pipe, void *ve)
{
    struct etna_pipe_context *priv = etna_pipe_context(pipe);
    priv->dirty_bits |= ETNA_STATE_VERTEX_ELEMENTS;
    priv->vertex_elements_p = ve;
    if(ve)
        priv->vertex_elements = *(struct compiled_vertex_elements_state*)ve;
}

static void etna_pipe_delete_vertex_elements_state(struct pipe_context *pipe, void *ve)
{
    struct compiled_vertex_elements_state *cs = (struct compiled_vertex_elements_state*)ve;
    //struct etna_pipe_context *priv = etna_pipe_context(pipe);
    FREE(cs);
}

static void etna_pipe_set_blend_color(struct pipe_context *pipe,
                        const struct pipe_blend_color *bc)
{
    struct etna_pipe_context *priv = etna_pipe_context(pipe);
    struct compiled_blend_color *cs = &priv->blend_color;
    cs->PE_ALPHA_BLEND_COLOR =
            VIVS_PE_ALPHA_BLEND_COLOR_R(etna_cfloat_to_uint8(bc->color[0])) |
            VIVS_PE_ALPHA_BLEND_COLOR_G(etna_cfloat_to_uint8(bc->color[1])) |
            VIVS_PE_ALPHA_BLEND_COLOR_B(etna_cfloat_to_uint8(bc->color[2])) |
            VIVS_PE_ALPHA_BLEND_COLOR_A(etna_cfloat_to_uint8(bc->color[3]));
    priv->dirty_bits |= ETNA_STATE_BLEND_COLOR;
}

static void etna_pipe_set_stencil_ref(struct pipe_context *pipe,
                        const struct pipe_stencil_ref *sr)
{
    struct etna_pipe_context *priv = etna_pipe_context(pipe);
    struct compiled_stencil_ref *cs = &priv->stencil_ref;

    priv->stencil_ref_s = *sr;

    cs->PE_STENCIL_CONFIG =
            VIVS_PE_STENCIL_CONFIG_REF_FRONT(sr->ref_value[0]);
            /* rest of bits weaved in from depth_stencil_alpha */
    cs->PE_STENCIL_CONFIG_EXT =
            VIVS_PE_STENCIL_CONFIG_EXT_REF_BACK(sr->ref_value[0]);
    priv->dirty_bits |= ETNA_STATE_STENCIL_REF;
}

static void etna_pipe_set_sample_mask(struct pipe_context *pipe,
                        unsigned sample_mask)
{
    struct etna_pipe_context *priv = etna_pipe_context(pipe);
    struct compiled_sample_mask *cs = &priv->sample_mask;

    priv->sample_mask_s = sample_mask;

    cs->GL_MULTI_SAMPLE_CONFIG =
            /* to be merged with render target state */
            VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_ENABLES(sample_mask);
    priv->dirty_bits |= ETNA_STATE_SAMPLE_MASK;
}

static void etna_pipe_set_framebuffer_state(struct pipe_context *pipe,
                              const struct pipe_framebuffer_state *sv)
{
    struct etna_pipe_context *priv = etna_pipe_context(pipe);
    struct compiled_framebuffer_state *cs = &priv->framebuffer;
    int nr_samples_color = -1;
    int nr_samples_depth = -1;

    /* Set up TS as well. Warning: this state is used by both the RS and PE */
    uint32_t ts_mem_config = 0;
    if(sv->nr_cbufs > 0) /* at least one color buffer? */
    {
        struct etna_surface *cbuf = etna_surface(sv->cbufs[0]);
        bool color_supertiled = (cbuf->layout & 2)!=0;
        assert(cbuf->layout & 1); /* Cannot render to linear surfaces */
        pipe_surface_reference(&cs->cbuf, &cbuf->base);
        cs->PE_COLOR_FORMAT =
                VIVS_PE_COLOR_FORMAT_FORMAT(translate_rt_format(cbuf->base.format, false)) |
                (color_supertiled ? VIVS_PE_COLOR_FORMAT_SUPER_TILED : 0);
                /* XXX VIVS_PE_COLOR_FORMAT_OVERWRITE and the rest comes from blend_state / depth_stencil_alpha */
                /* merged with depth_stencil_alpha */
        if((cbuf->surf.offset & 63) || (((cbuf->surf.stride*4) & 63) && cbuf->surf.height > 4))
        {
            /* XXX Must make temporary surface here.
             * Need the same mechanism on gc2000 when we want to do mipmap generation by
             * rendering to levels > 1 due to multitiled / tiled conversion.
             */
            BUG("Alignment error, trying to render to offset %08x with tile stride %i",
                    cbuf->surf.offset, cbuf->surf.stride*4);
        }

        struct etna_resource *res = etna_resource(cbuf->base.texture);
        struct etna_bo *bo = res->bo;
        if (priv->ctx->conn->chip.pixel_pipes == 1)
        {
            cs->PE_COLOR_ADDR = etna_bo_gpu_address(bo) + cbuf->surf.offset;
        }
        else if (priv->ctx->conn->chip.pixel_pipes == 2)
        {
            cs->PE_PIPE_COLOR_ADDR[0] = res->pipe_addr[0];
            cs->PE_PIPE_COLOR_ADDR[1] = res->pipe_addr[1];
        }
        cs->PE_COLOR_STRIDE = cbuf->surf.stride;
        if(cbuf->surf.ts_size)
        {
            struct etna_bo *ts_bo = etna_resource(cbuf->base.texture)->ts_bo;
            ts_mem_config |= VIVS_TS_MEM_CONFIG_COLOR_FAST_CLEAR;
            cs->TS_COLOR_CLEAR_VALUE = cbuf->level->clear_value;
            cs->TS_COLOR_STATUS_BASE = etna_bo_gpu_address(ts_bo) + cbuf->surf.ts_offset;
            cs->TS_COLOR_SURFACE_BASE = etna_bo_gpu_address(bo) + cbuf->surf.offset;
        }
        /* MSAA */
        if(cbuf->base.texture->nr_samples > 1)
            ts_mem_config |= VIVS_TS_MEM_CONFIG_MSAA | translate_msaa_format(cbuf->base.format, false);
        nr_samples_color = cbuf->base.texture->nr_samples;
    } else {
        pipe_surface_reference(&cs->cbuf, NULL);
        cs->PE_COLOR_FORMAT = 0; /* Is this enough to render without color? */
    }

    if(sv->zsbuf != NULL)
    {
        struct etna_surface *zsbuf = etna_surface(sv->zsbuf);
        pipe_surface_reference(&cs->zsbuf, &zsbuf->base);
        assert(zsbuf->layout & 1); /* Cannot render to linear surfaces */
        uint32_t depth_format = translate_depth_format(zsbuf->base.format, false);
        unsigned depth_bits = depth_format == VIVS_PE_DEPTH_CONFIG_DEPTH_FORMAT_D16 ? 16 : 24;
        bool depth_supertiled = (zsbuf->layout & 2)!=0;
        cs->PE_DEPTH_CONFIG =
                depth_format |
                (depth_supertiled ? VIVS_PE_DEPTH_CONFIG_SUPER_TILED : 0) |
                VIVS_PE_DEPTH_CONFIG_DEPTH_MODE_Z;
                /* VIVS_PE_DEPTH_CONFIG_ONLY_DEPTH */
                /* merged with depth_stencil_alpha */
        struct etna_resource *res = etna_resource(zsbuf->base.texture);
        struct etna_bo *bo = res->bo;
        if (priv->ctx->conn->chip.pixel_pipes == 1)
        {
            cs->PE_DEPTH_ADDR = etna_bo_gpu_address(bo) + zsbuf->surf.offset;
        }
        else if (priv->ctx->conn->chip.pixel_pipes == 2)
        {
            cs->PE_PIPE_DEPTH_ADDR[0] = res->pipe_addr[0];
            cs->PE_PIPE_DEPTH_ADDR[1] = res->pipe_addr[1];
        }
        cs->PE_DEPTH_STRIDE = zsbuf->surf.stride;
        cs->PE_HDEPTH_CONTROL = VIVS_PE_HDEPTH_CONTROL_FORMAT_DISABLED;
        cs->PE_DEPTH_NORMALIZE = etna_f32_to_u32(exp2f(depth_bits) - 1.0f);
        if(zsbuf->surf.ts_size)
        {
            struct etna_bo *ts_bo = etna_resource(zsbuf->base.texture)->ts_bo;
            ts_mem_config |= VIVS_TS_MEM_CONFIG_DEPTH_FAST_CLEAR;
            cs->TS_DEPTH_CLEAR_VALUE = zsbuf->level->clear_value;
            cs->TS_DEPTH_STATUS_BASE = etna_bo_gpu_address(ts_bo) + zsbuf->surf.ts_offset;
            cs->TS_DEPTH_SURFACE_BASE = etna_bo_gpu_address(bo) + zsbuf->surf.offset;
        }
        ts_mem_config |= (depth_bits == 16 ? VIVS_TS_MEM_CONFIG_DEPTH_16BPP : 0);
        /* MSAA */
        if(zsbuf->base.texture->nr_samples > 1)
            /* XXX VIVS_TS_MEM_CONFIG_DEPTH_COMPRESSION;
             * Disable without MSAA for now, as it causes corruption in glquake. */
            ts_mem_config |= VIVS_TS_MEM_CONFIG_DEPTH_COMPRESSION;
        nr_samples_depth = zsbuf->base.texture->nr_samples;
    } else {
        pipe_surface_reference(&cs->zsbuf, NULL);
        cs->PE_DEPTH_CONFIG = VIVS_PE_DEPTH_CONFIG_DEPTH_MODE_NONE;
    }

    /* MSAA setup */
    if(nr_samples_depth != -1 && nr_samples_color != -1 &&
        nr_samples_depth != nr_samples_color)
    {
        BUG("Number of samples in color and depth texture must match (%i and %i respectively)",
                nr_samples_color, nr_samples_depth);
    }
    switch(MAX2(nr_samples_depth, nr_samples_color))
    {
    case 0:
    case 1: /* Are 0 and 1 samples allowed? */
        cs->GL_MULTI_SAMPLE_CONFIG = VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES_NONE;
        cs->msaa_mode = false;
        break;
    case 2:
        cs->GL_MULTI_SAMPLE_CONFIG = VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES_2X;
        cs->msaa_mode = true; /* Add input to PS */
        cs->RA_MULTISAMPLE_UNK00E04 = 0x0;
        cs->RA_MULTISAMPLE_UNK00E10[0] = 0x0000aa22;
        cs->RA_CENTROID_TABLE[0] = 0x66aa2288;
        cs->RA_CENTROID_TABLE[1] = 0x88558800;
        cs->RA_CENTROID_TABLE[2] = 0x88881100;
        cs->RA_CENTROID_TABLE[3] = 0x33888800;
        break;
    case 4:
        cs->GL_MULTI_SAMPLE_CONFIG = VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES_4X;
        cs->msaa_mode = true; /* Add input to PS */
        cs->RA_MULTISAMPLE_UNK00E04 = 0x0;
        cs->RA_MULTISAMPLE_UNK00E10[0] = 0xeaa26e26;
        cs->RA_MULTISAMPLE_UNK00E10[1] = 0xe6ae622a;
        cs->RA_MULTISAMPLE_UNK00E10[2] = 0xaaa22a22;
        cs->RA_CENTROID_TABLE[0] = 0x4a6e2688;
        cs->RA_CENTROID_TABLE[1] = 0x888888a2;
        cs->RA_CENTROID_TABLE[2] = 0x888888ea;
        cs->RA_CENTROID_TABLE[3] = 0x888888c6;
        cs->RA_CENTROID_TABLE[4] = 0x46622a88;
        cs->RA_CENTROID_TABLE[5] = 0x888888ae;
        cs->RA_CENTROID_TABLE[6] = 0x888888e6;
        cs->RA_CENTROID_TABLE[7] = 0x888888ca;
        cs->RA_CENTROID_TABLE[8] = 0x262a2288;
        cs->RA_CENTROID_TABLE[9] = 0x886688a2;
        cs->RA_CENTROID_TABLE[10] = 0x888866aa;
        cs->RA_CENTROID_TABLE[11] = 0x668888a6;
        break;
    }

    /* Scissor setup */
    cs->SE_SCISSOR_LEFT = 0; /* affected by rasterizer and scissor state as well */
    cs->SE_SCISSOR_TOP = 0;
    cs->SE_SCISSOR_RIGHT = (sv->width << 16)-1;
    cs->SE_SCISSOR_BOTTOM = (sv->height << 16)-1;

    cs->TS_MEM_CONFIG = ts_mem_config;

    priv->dirty_bits |= ETNA_STATE_FRAMEBUFFER;
    priv->framebuffer_s = *sv; /* keep copy of original structure */
}

static void etna_pipe_set_scissor_states( struct pipe_context *pipe,
                          unsigned start_slot,
                          unsigned num_scissors,
                          const struct pipe_scissor_state *ss)
{
    struct etna_pipe_context *priv = etna_pipe_context(pipe);
    struct compiled_scissor_state *cs = &priv->scissor;
    priv->scissor_s = *ss;
    cs->SE_SCISSOR_LEFT = (ss->minx << 16);
    cs->SE_SCISSOR_TOP = (ss->miny << 16);
    cs->SE_SCISSOR_RIGHT = (ss->maxx << 16)-1;
    cs->SE_SCISSOR_BOTTOM = (ss->maxy << 16)-1;
    /* note that this state is only used when rasterizer_state->scissor is on */
    priv->dirty_bits |= ETNA_STATE_SCISSOR;
}

static void etna_pipe_set_viewport_states( struct pipe_context *pipe,
                           unsigned start_slot,
                           unsigned num_scissors,
                           const struct pipe_viewport_state *vs)
{
    struct etna_pipe_context *priv = etna_pipe_context(pipe);
    struct compiled_viewport_state *cs = &priv->viewport;
    priv->viewport_s = *vs;
    /**
     * For Vivante GPU, viewport z transformation is 0..1 to 0..1 instead of -1..1 to 0..1.
     * scaling and translation to 0..1 already happened, so remove that
     *
     * z' = (z * 2 - 1) * scale + translate
     *    = z * (2 * scale) + (translate - scale)
     *
     * scale' = 2 * scale
     * translate' = translate - scale
     */
    cs->PA_VIEWPORT_SCALE_X = etna_f32_to_fixp16(vs->scale[0]); /* must be fixp as v4 state deltas assume it is */
    cs->PA_VIEWPORT_SCALE_Y = etna_f32_to_fixp16(vs->scale[1]);
    cs->PA_VIEWPORT_SCALE_Z = etna_f32_to_u32(vs->scale[2] * 2.0f);
    cs->PA_VIEWPORT_OFFSET_X = etna_f32_to_fixp16(vs->translate[0]);
    cs->PA_VIEWPORT_OFFSET_Y = etna_f32_to_fixp16(vs->translate[1]);
    cs->PA_VIEWPORT_OFFSET_Z = etna_f32_to_u32(vs->translate[2] - vs->scale[2]);

    /* Compute scissor rectangle (fixp) from viewport.
     * Make sure left is always < right and top always < bottom.
     */
    cs->SE_SCISSOR_LEFT = etna_f32_to_fixp16(MAX2(vs->translate[0] - vs->scale[0], 0.0f));
    cs->SE_SCISSOR_TOP = etna_f32_to_fixp16(MAX2(vs->translate[1] - vs->scale[1], 0.0f));
    cs->SE_SCISSOR_RIGHT = etna_f32_to_fixp16(MAX2(vs->translate[0] + vs->scale[0], 0.0f));
    cs->SE_SCISSOR_BOTTOM = etna_f32_to_fixp16(MAX2(vs->translate[1] + vs->scale[1], 0.0f));
    if(cs->SE_SCISSOR_LEFT > cs->SE_SCISSOR_RIGHT)
    {
        uint32_t tmp = cs->SE_SCISSOR_RIGHT;
        cs->SE_SCISSOR_RIGHT = cs->SE_SCISSOR_LEFT;
        cs->SE_SCISSOR_LEFT = tmp;
    }
    if(cs->SE_SCISSOR_TOP > cs->SE_SCISSOR_BOTTOM)
    {
        uint32_t tmp = cs->SE_SCISSOR_BOTTOM;
        cs->SE_SCISSOR_BOTTOM = cs->SE_SCISSOR_TOP;
        cs->SE_SCISSOR_TOP = tmp;
    }

    cs->PE_DEPTH_NEAR = etna_f32_to_u32(0.0); /* not affected if depth mode is Z (as in GL) */
    cs->PE_DEPTH_FAR = etna_f32_to_u32(1.0);
    priv->dirty_bits |= ETNA_STATE_VIEWPORT;
}

static void etna_pipe_set_vertex_buffers( struct pipe_context *pipe,
                           unsigned start_slot,
                           unsigned num_buffers,
                           const struct pipe_vertex_buffer *vb)
{
    struct etna_pipe_context *priv = etna_pipe_context(pipe);
    assert((start_slot + num_buffers) <= PIPE_MAX_ATTRIBS);
    struct pipe_vertex_buffer zero_vb = {};
    for(unsigned idx=0; idx<num_buffers; ++idx)
    {
        unsigned slot = start_slot + idx; /* copy from vb[idx] to priv->...[slot] */
        const struct pipe_vertex_buffer *vbi = vb ? &vb[idx] : &zero_vb;
        struct compiled_set_vertex_buffer *cs = &priv->vertex_buffer[slot];
        assert(!vbi->user_buffer); /* XXX support user_buffer using etna_usermem_map */
        /* copy pipe_vertex_buffer structure and take reference */
        priv->vertex_buffer_s[slot].stride = vbi->stride;
        priv->vertex_buffer_s[slot].buffer_offset = vbi->buffer_offset;
        pipe_resource_reference(&priv->vertex_buffer_s[slot].buffer, vbi->buffer);
        priv->vertex_buffer_s[slot].user_buffer = vbi->user_buffer;
        /* determine addresses */
        viv_addr_t gpu_addr = 0;
        if(vbi->buffer) /* GPU buffer */
        {
            struct etna_bo *bo = etna_resource(vbi->buffer)->bo;
            gpu_addr = etna_bo_gpu_address(bo) + vbi->buffer_offset;
        }
        /* compiled state */
        cs->FE_VERTEX_STREAM_CONTROL = FE_VERTEX_STREAM_CONTROL_VERTEX_STRIDE(vbi->stride);
        cs->FE_VERTEX_STREAM_BASE_ADDR = gpu_addr;

        etna_resource_touch(pipe, vbi->buffer);
    }

    priv->dirty_bits |= ETNA_STATE_VERTEX_BUFFERS;
}

static void etna_pipe_set_index_buffer( struct pipe_context *pipe,
                         const struct pipe_index_buffer *ib)
{
    struct etna_pipe_context *priv = etna_pipe_context(pipe);
    struct compiled_set_index_buffer *cs = &priv->index_buffer;
    if(ib == NULL)
    {
        pipe_resource_reference(&priv->index_buffer_s.buffer, NULL); /* update reference to buffer */
        cs->FE_INDEX_STREAM_CONTROL = 0;
        cs->FE_INDEX_STREAM_BASE_ADDR = 0;
    } else
    {
        assert(ib->buffer); /* XXX user_buffer using etna_usermem_map */
        pipe_resource_reference(&priv->index_buffer_s.buffer, ib->buffer); /* update reference to buffer */
        priv->index_buffer_s.index_size = ib->index_size;
        priv->index_buffer_s.offset = ib->offset;
        priv->index_buffer_s.user_buffer = ib->user_buffer;

        struct etna_bo *bo = etna_resource(ib->buffer)->bo;
        cs->FE_INDEX_STREAM_CONTROL =
                translate_index_size(ib->index_size);
        cs->FE_INDEX_STREAM_BASE_ADDR = etna_bo_gpu_address(bo) + ib->offset;

        etna_resource_touch(pipe, ib->buffer);
    }
    priv->dirty_bits |= ETNA_STATE_INDEX_BUFFER;
}

static void etna_pipe_flush(struct pipe_context *pipe,
             struct pipe_fence_handle **fence_out,
             enum pipe_flush_flags flags)
{
    struct etna_pipe_context *priv = etna_pipe_context(pipe);
    uint32_t _fence_tmp; /* just pass through fence, though we have to convert the type... */
    uint32_t *fence_in = (fence_out == NULL) ? NULL : (&_fence_tmp);
    if(etna_flush(priv->ctx, fence_in) != ETNA_OK)
    {
        BUG("Error: etna_flush failed, GPU may be in unpredictable state");
    }
    if(fence_out)
        *fence_out = ETNA_FENCE_TO_PIPE_HANDLE(*fence_in);

    if(DBG_ENABLED(ETNA_DBG_FINISH_ALL))
    {
        if(etna_finish(priv->ctx) != ETNA_OK)
        {
            BUG("Error: etna_finish failed, GPU may be in unpredictable state");
            abort();
        }
    }
}

static void etna_pipe_set_clip_state(struct pipe_context *pipe, const struct pipe_clip_state *pcs)
{
    /* NOOP */
}

static void etna_pipe_set_polygon_stipple(struct pipe_context *pctx,
		const struct pipe_poly_stipple *stipple)
{
    /* NOP */
}

struct pipe_context *etna_new_pipe_context(struct viv_conn *dev, const struct etna_pipe_specs *specs, struct pipe_screen *screen, void *priv)
{
    struct etna_pipe_context *ectx = CALLOC_STRUCT(etna_pipe_context);
    if(ectx == NULL)
        return NULL;
    struct pipe_context *pc = &ectx->base;

    pc->priv = priv;
    pc->screen = screen;

    if(etna_create(dev, &ectx->ctx) < 0)
    {
        FREE(pc);
        return NULL;
    }
    etna_set_context_cb(ectx->ctx, update_context, ectx);

    /* context ctxate setup */
    ectx->dirty_bits = 0xffffffff;
    ectx->conn = dev;
    ectx->specs = *specs;

    /*  Set sensible defaults for state */
    ectx->gpu3d.PA_W_CLIP_LIMIT = 0x34000001;
    ectx->gpu3d.GL_VERTEX_ELEMENT_CONFIG = 0x1;
    ectx->gpu3d.GL_API_MODE = VIVS_GL_API_MODE_OPENGL;
    ectx->gpu3d.RA_EARLY_DEPTH = 0x00000031; /* enable */

    /* fill in vtable entries one by one */
    pc->destroy = etna_pipe_destroy;
    pc->draw_vbo = etna_pipe_draw_vbo;
    /* XXX render_condition */
    /* XXX create_query */
    /* XXX destroy_query */
    /* XXX begin_query */
    /* XXX end_query */
    /* XXX get_query_result */
    pc->create_vertex_elements_state = etna_pipe_create_vertex_elements_state;
    pc->bind_vertex_elements_state = etna_pipe_bind_vertex_elements_state;
    pc->delete_vertex_elements_state = etna_pipe_delete_vertex_elements_state;
    pc->set_blend_color = etna_pipe_set_blend_color;
    pc->set_stencil_ref = etna_pipe_set_stencil_ref;
    pc->set_sample_mask = etna_pipe_set_sample_mask;
    pc->set_clip_state = etna_pipe_set_clip_state;
    pc->set_framebuffer_state = etna_pipe_set_framebuffer_state;
    pc->set_polygon_stipple = etna_pipe_set_polygon_stipple;
    pc->set_scissor_states = etna_pipe_set_scissor_states;
    pc->set_viewport_states = etna_pipe_set_viewport_states;
    pc->set_vertex_buffers = etna_pipe_set_vertex_buffers;
    pc->set_index_buffer = etna_pipe_set_index_buffer;
    /* XXX create_stream_output_target */
    /* XXX stream_output_target_destroy */
    /* XXX set_stream_output_targets */
    pc->flush = etna_pipe_flush;
    /* XXX create_video_decoder */
    /* XXX create_video_buffer */
    /* XXX create_compute_state */
    /* XXX bind_compute_state */
    /* XXX delete_compute_state */
    /* XXX set_compute_resources */
    /* XXX set_global_binding */
    /* XXX launch_grid */

    etna_pipe_blend_init(pc);
    etna_pipe_rasterizer_init(pc);
    etna_pipe_shader_init(pc);
    etna_pipe_surface_init(pc);
    etna_pipe_texture_init(pc);
    etna_pipe_transfer_init(pc);
    etna_pipe_zsa_init(pc);
    etna_pipe_clear_blit_init(pc);

    /* Reset GPU to initial state */
    reset_context(pc);
    return pc;
}

